
****************************************
*         REPLICATION FILE             *
* Buhaug, Croicu, Fjelde & von Uexkull *
* 'A Conditional Model of Local Income *
*     Shock and Civil Conflict'        *
*       Journal of Politics            *
*                                      *
* For description of measurements and  *
* data sources, see article and online *
* supplementary material; dataset      *    
* generated and analyzed in Stata 14   *
*       Last run: 19 July 2019         *
*                                      *
*  Contact details: halvard@prio.org   *
****************************************



*** MODELS IN ARTICLE ***

use BCFvU_JOP_repdata.dta, clear
tsset gwgroupid year

** Table 1. Drought and ethnic conflict onset - full sample, 1971-2013
* M1.1
eststo clear
eststo: melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog 
estat ic
predict p1 if e(sample), pr
roctab onsetx p1
* M1.2
eststo: melogit onsetx c.l.avg_spei15##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2
* M1.3
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using table1.rtf, replace se nolines noeqlines title ("Table 1. Full sample") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3




** Table 2a. Drought and ethnic conflict onset - low group development subsample, 1971-2013
* inclusion criterion: group mean luminosity in time-series below median value 

sum mlight_cap if sample==1, d
* M2.1
eststo clear
eststo: melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1
* M2.2
eststo: melogit onsetx c.l.avg_spei15##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2
* M2.3
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using table2a.rtf, replace se nolines noeqlines title ("Table 2a. Below-median light per capita subsample") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3


** Table 2b. Drought and ethnic conflict onset -  high agricultural dependence subsample, 1971-2013
* inclusion criterion: groups in countries with mean employment in agriculture above 30% (roughly equal to global country mean)
* M2.4
eststo clear
eststo: melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1
* M2.5
eststo: melogit onsetx c.l.avg_spei15##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2
* M2.6
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using table2b.rtf, replace se nolines noeqlines title ("Table 2b. High ag. employment subsample") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01)  note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3


** Figure 1 generated in ArcMap from GeoEPR 2014 shapefiles (https://icr.ethz.ch/data/epr/geoepr/2014.html)


** Figure 2 generated in R, based on estimation of marginal effects in Tables 1-2:
* rescale variable so that calculated change = from 0 to 75pct 
gen avg_spei15x10=avg_spei15*10

* scenario: median (binary) or mean
* Models 1.1 - 1.3 (full sample --> Full.csv)
qui melogit onsetx l.avg_spei15x10 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog 
margins, dydx(l.avg_spei15x10) at((means) l.avg_spei15x10=0 downgraded10=0 status_disc=0 postcoldwar=1) 

qui melogit onsetx c.l.avg_spei15x10##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
margins, dydx(l.avg_spei15x10) by(status_disc) at((means) l.avg_spei15x10=0 downgraded10=0 postcoldwar=1) 

qui melogit onsetx c.l.avg_spei15x10##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
margins, dydx(l.avg_spei15x10) by(downgraded10) at((means) l.avg_spei15x10=0 status_disc=0 postcoldwar=1) 

* Models 2.1 - 2.3 (low development subsample --> Low_dev.csv)
qui melogit onsetx l.avg_spei15x10 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
margins, dydx(l.avg_spei15x10) at((means) l.avg_spei15x10=0 downgraded10=0 status_disc=0 postcoldwar=1) 

qui melogit onsetx c.l.avg_spei15x10##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
margins, dydx(l.avg_spei15x10) by(status_disc) at((means) l.avg_spei15x10=0 downgraded10=0 postcoldwar=1) 
  
qui melogit onsetx c.l.avg_spei15x10##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
margins, dydx(l.avg_spei15x10) by(downgraded10) at((means) l.avg_spei15x10=0 status_disc=0 postcoldwar=1) 

* Models 2.4 - 2.6 (High ag. dependence subsample --> Ag_dep.csv)
qui melogit onsetx l.avg_spei15x10 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
margins, dydx(l.avg_spei15x10) at((means) l.avg_spei15x10=0 downgraded10=0 status_disc=0 postcoldwar=1) 

qui melogit onsetx c.l.avg_spei15x10##status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30|| gwid:, nolog
margins, dydx(l.avg_spei15x10) by(status_disc) at((means) l.avg_spei15x10=0 downgraded10=0 postcoldwar=1) 

qui melogit onsetx c.l.avg_spei15x10##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
margins, dydx(l.avg_spei15x10) by(downgraded10) at((means) l.avg_spei15x10=0 status_disc=0 postcoldwar=1) 

* all margins estimates (dy/dx and 95% CI) pasted and save as .csv files'
* then imported into R to create Fig. 2 (see separate R file)






*** SUPPLEMENTARY MATERIAL ***

*** A. Descriptives

use BCFvU_JOP_repdata, clear
tsset gwgroupid year


** Table A1 variable operationalization is text only


** Table A2 descriptive statistics for valid global sample 
qui melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog 
outreg2 using descriptives if e(sample), replace sum(log) keep(onsetx avg_spei15 status_disc downgraded10 postcoldwar ///
lpeaceyrs n1 ln_nl_cap c_agempl cropland lgrppop c_lgdpcap egip_pct status_excl v2x_polyarchy v2x_poly2 c_lpop lotherEPR_sb avg_spei) word


** Table A3 correlation matrix
qui melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog 
gen lavg_spei15=l.avg_spei15
estpost corr onsetx lavg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 mlight_cap mean_c_agempll if e(sample), matrix 
eststo TableA3
esttab TableA3 using TableA3.rtf, unstack compress b(3) replace


** Figure A1 historgram of spei
hist avg_spei15 if sample==1, percent ytitle(Percent) xtitle(g_shock) xsize(5.5) ysize(5) 


** Figure A2 map of low light/cap groups
sum mlight_cap if sample==1, d // median = 0.0155
gen mlight_cap50=0 if mlight_cap>=0.0155 & mlight_cap!=. & sample==1 
replace mlight_cap50=1 if mlight_cap<0.0155 & sample==1 
lab var mlight_cap50 "1 for groups with below-median light/capita 1992-2012"

keep gwgroupid mlight_cap50
collapse (mean) mlight_cap50, by(gwgroupid)
sort gwgroupid
save nlight_for_map.dta, replace
* merge group-specific nlight data with GeoEPR groups and convert to .dbf to make map in ArcMap


** Figure A3 map of grps in high ag.dep subsample
use BCFvU_JOP_repdata, clear
sum mean_c_agempll if sample==1, d // median = 31.7, rounded downwards to 30%
gen agdep30=0
replace agdep30=1 if mean_c_agempll>30
replace agdep30=. if sample!=1
keep gwgroupid agdep30
collapse (mean) agdep30, by(gwgroupid)
sort gwgroupid
save agdep_for_map.dta, replace
* merge with GeoEPR 2014 groups as .dbf to make map in ArcMap



*** B. Validations

** Table B1 country-year validation: drought --> economic loss
use BCFvU_JOP_cntryval.dta, clear
tsset gwid year

sum mc_agempl // mean = 29.61, rounded to 30%
eststo clear
eststo: xtreg gdpgrowthx l.avg_spei15x avg_spei15x f.avg_spei15x i.year, fe
eststo: xtreg gdpgrowthx l.avg_spei15x avg_spei15x f.avg_spei15x i.year if mc_agempl>30, fe // ag.dep subsample

esttab using tableB1.rtf,  replace se nolines noeqlines title ("Table B1. Drought and GDP growth") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Country fixed-effects OLS estimates with standard errors in parentheses.")


** Table B2 group-level validation: drought --> light emission loss
use BCFvU_JOP_repdata, clear
tsset gwgroupid year

sum mlight_cap if sample==1, d // median = 0.0155 
eststo clear
pwcorr l.avg_spei15 avg_spei15 f.avg_spei15 if sample==1
eststo: xtreg lightgrowthx l.avg_spei15 avg_spei15 f.avg_spei15 n1 y19* y20* if sample==1, fe
eststo: xtreg lightgrowthx l.avg_spei15 avg_spei15 f.avg_spei15 n1 y19* y20* if mlight_cap<0.0155 & sample==1, fe
eststo: xtreg lightgrowthx l.avg_spei15 avg_spei15 f.avg_spei15 n1 y19* y20* if mean_c_agempll>30 & sample==1, fe 

esttab using tableB2.rtf, replace se nolines noeqlines title ("Table B2. Drought and nightlight growth") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Group fixed-effects OLS estimates with standard errors in parentheses.")


** Figure B1 validation: nightlight correlates with development
scatter c_lnlight_cap c_urbanl if year==2000, ytitle("Light emission per capita, ln") ytitle(, size(large)) ///
ylabel(, labsize(large)) xtitle("Urbanization, %") xtitle(, size(large)) xlabel(, labsize(large)) xsize(5.5) ysize(5)

scatter c_lnlight_cap c_lgdpcapl if year==2000, ytitle("Light emission per capita, ln") ytitle(, size(large)) ///
ylabel(, labsize(large)) xtitle("GDP per capita, ln") xtitle(, size(large)) xlabel(, labsize(large)) xsize(5.5) ysize(5)

scatter c_lnlight_cap c_agempl if year==2000, ytitle("Light emission per capita, ln") ytitle(, size(large)) ///
ylabel(, labsize(large)) xtitle("Ag. employment, %") xtitle(, size(large)) xlabel(, labsize(large)) xsize(5.5) ysize(5)


** Table B3 GDP growth and light emission growth
use BCFvU_JOP_cntryval.dta, clear
tsset gwid year

eststo clear
eststo: xtreg lightgrowthx l.gdpgrowthx gdpgrowthx f.gdpgrowthx n1 y19* y20*, fe 
eststo: xtreg lightgrowthx l.gdpgrowthx gdpgrowthx f.gdpgrowthx n1 y19* y20* if mc_agempl>30, fe 

esttab using tableB3.rtf, replace se nolines noeqlines title ("Table B3. GDP growth and luminosity growth") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Country fixed-effects OLS estimates with standard errors in parentheses.")


** Figure B2 histogram of more inclusive spei 1.0
use BCFvU_JOP_repdata, clear
tsset gwgroupid year

hist avg_spei if sample==1, percent ytitle(Percent) xtitle("g_shock_1.0") xsize(5.5) ysize(5)


** Table B4 replication of Table 2 in article with spei 1.0 instead of 1.5
eststo clear
eststo: xtreg lightgrowthx l.avg_spei avg_spei f.avg_spei n1 y19* y20* if sample==1, fe
eststo: xtreg lightgrowthx l.avg_spei avg_spei f.avg_spei n1 y19* y20* if mlight_cap<0.0155 & sample==1, fe
eststo: xtreg lightgrowthx l.avg_spei avg_spei f.avg_spei n1 y19* y20* if mean_c_agempll>31.70536 & sample==1, fe 

esttab using tableB4.rtf, replace se nolines noeqlines title ("Table B4. Drought and nightlight growth with SPEI 1.0") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Group fixed-effects OLS estimates with standard errors in parentheses.")



*** C. Alternative model specifications

** Table C1 1ist of most influential cases - Model 5.3 (using OLS w/ country dummies instead of melogit to obtain dfbetas)
eststo clear
qui reg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 i.gwid if sample==1
dfbeta l.avg_spei15
gen outl_1 = 0 if  e(sample)
replace outl_1=1 if abs(_dfbeta_1) > 2/sqrt(15145)  & _dfbeta_1!=.

l state group onsetx year l.avg_spei15 downgraded10 _dfbeta_1 if _dfbeta_1>0.2 & _dfbeta_1!=.
drop _df* 


** Table C2 exclude obs with high dfbetas values
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & outl_1==0 || gwid:, nolog
estat ic
predict p1 if e(sample), xb
roctab onsetx p1

qui reg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 i.gwid if sample==1 & mlight_cap<0.0155
dfbeta l.avg_spei15
gen outl_2 = 0 if  e(sample)
replace outl_2=1 if abs(_dfbeta_1) > 2/sqrt(7096)  & _dfbeta_1!=.
* exclude obs with high dfbetas values
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1  & mlight_cap<0.0155 & outl_2==0 || gwid:, nolog
estat ic
predict p2 if e(sample), xb
roctab onsetx p2
drop _df* 

qui reg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 i.gwid if sample==1 & mean_c_agempll>30
dfbeta l.avg_spei15
gen outl_3 = 0 if  e(sample)
replace outl_3=1 if abs(_dfbeta_1) > 2/sqrt(8830)  & _dfbeta_1!=.
* exclude obs with high dfbetas values
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 & outl_3==0 || gwid:, nolog
estat ic
predict p3 if e(sample), xb
roctab onsetx p3

esttab using tableC1.rtf, replace se nolines noeqlines title ("Table C2. Excluding influential observations") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01)  note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3 _df* outl*


** Table C3 spei 1.0 instead of spei 1.5 
qui melogit onsetx l.avg_spei15 status_disc downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog 
corr avg_spei15 avg_spei if e(sample)

eststo clear
eststo: melogit onsetx c.l.avg_spei##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1

eststo: melogit onsetx c.l.avg_spei##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2

eststo: melogit onsetx c.l.avg_spei##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using tableC3.rtf, replace se nolines noeqlines title ("Table C3. Ethnic conflict onset with SPEI 1.0 instead of 1.5") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3



** Table C4 exclusion instead of discrimination as conditioning factor
eststo clear
eststo: melogit onsetx c.l.avg_spei15##status_excl downgraded10 postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1

eststo: melogit onsetx c.l.avg_spei15##status_excl downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2

eststo: melogit onsetx c.l.avg_spei15##status_excl downgraded10 postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using tableC4.rtf, replace se nolines noeqlines title ("Table C4. Ethnic conflict onset with excluded instead of dicriminated") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3


** Table C5 a greater selection of controls
eststo clear
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc cropland l.lgrppop c_lgdpcapl egip_pctl l.v2x_polyarchy l.v2x_poly2 ///
c_lpopl postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1

eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc cropland l.lgrppop c_lgdpcapl egip_pctl l.v2x_polyarchy l.v2x_poly2 ///
c_lpopl postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0155 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2

eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc cropland l.lgrppop c_lgdpcapl egip_pctl l.v2x_polyarchy l.v2x_poly2 ///
c_lpopl postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>30 || gwid:, nolog
estat ic
predict p3 if e(sample), pr
roctab onsetx p3

esttab using tableC5.rtf, replace se nolines noeqlines title ("Table C5. Additional controls") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2 p3


** Table C6 country fixed-effects OLS  
eststo clear
eststo: xtreg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs i.year if sample==1, fe cl(gwid) 
estat ic
predict p1 if e(sample), xb
roctab onsetx p1

eststo: xtreg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs i.year if sample==1 & mlight_cap<0.0155, fe cl(gwid)
estat ic
predict p2 if e(sample), xb
roctab onsetx p2

eststo: xtreg onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs i.year if sample==1 & mean_c_agempll>30, fe cl(gwid)
estat ic
predict p3 if e(sample), xb
roctab onsetx p3

esttab using tableC6.rtf, replace se nolines noeqlines title ("Table C6. OLS with group and year FE") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("OLS estimates with group and year fixed-effects and standard errors clustered on countries in parentheses.")
drop p1 p2 p3


** Table C7 conditional (fixed-effects) logit
eststo clear
eststo: clogit onsetx c.l.avg_spei15##downgraded10 status_disc lpeaceyrs i.year if sample==1, group(gwgroupid) nolog 
estat ic
predict p1 if e(sample), xb
roctab onsetx p1

eststo: clogit onsetx c.l.avg_spei15##downgraded10 status_disc lpeaceyrs i.year if sample==1 & mlight_cap<0.0155, group(gwgroupid) nolog 
estat ic
predict p2 if e(sample), xb
roctab onsetx p2

eststo: clogit onsetx c.l.avg_spei15##downgraded10 status_disc lpeaceyrs i.year if sample==1 & mean_c_agempll>30, group(gwgroupid) nolog 
estat ic
predict p3 if e(sample), xb
roctab onsetx p3

esttab using tableC7.rtf, replace se nolines noeqlines title ("Table C7. Conditional logit") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Conditional (group fixed effects) logit estimates with standard errors in parentheses.")
drop p1 p2 p3


** Table C8 alternative subsamples: lowest quartile light/capita and >50% ag. employment
sum mlight_cap if sample==1, d

eststo clear
eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mlight_cap<0.0032 || gwid:, nolog
estat ic
predict p1 if e(sample), pr
roctab onsetx p1

eststo: melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 & mean_c_agempll>50 || gwid:, nolog
estat ic
predict p2 if e(sample), pr
roctab onsetx p2

esttab using tableC8.rtf, replace se nolines noeqlines title ("Table C8. Alternative subsample thresholds") mtitles ("") nolabel  ///
onecell star(+ 0.10 * 0.05 ** 0.01) note("Two-level random effects logit estimates with standard errors in parentheses.")
drop p1 p2


** Table C9 5-fold out-of-sample cross-validation
* set aside 20%, train on the remaining 80%, forecast + repeat four times, changing the 20% prediction sample
* then repeat everything 10 times over different seed randomizations, totalling 50 permutations
* point of departure: Model 1.3, compare with model w/o interaction and model w/o l.avg_spei15

forvalues seed = 1(1)10 {
bysort eprgid: gen randnum`seed' = rnormal()
bysort eprgid: replace randnum`seed' = randnum`seed'[1]
egen fold`seed' = cut(randnum`seed'), group(5)
}

local vlistXX
set graphics off
tsset gwgroupid year

qui melogit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if sample==1 || gwid:, nolog
gen samplecv=e(sample)

set seed 586358

forvalues seed = 1(1)10 {
    display "Now doing iteration: `seed'/10"
    forvalues current_fold = 0(1)4 {
        display "k-fold is: `current_fold' (out of 5-folds)"

        display "Model I full specification : "
        qui logit onsetx c.l.avg_spei15##downgraded10 status_disc postcoldwar lpeaceyrs n1 if samplecv==1 & fold`seed'!=`current_fold', nolog cl(gwgroupid)
        predict fcastI`current_fold'`seed' if fold`seed' == `current_fold' & samplecv==1
		eststo model`current_fold'`seed'

        display "Model II no interaction : "
		qui logit onsetx l.avg_spei15 downgraded10 status_disc postcoldwar lpeaceyrs n1 if samplecv==1 & fold`seed'!=`current_fold', nolog cl(gwgroupid)
        predict fcastII`current_fold'`seed' if fold`seed' == `current_fold' & samplecv==1
		eststo model`current_fold'`seed'

        display "Model III no spei : "
        qui logit onsetx downgraded10 status_disc postcoldwar lpeaceyrs n1 if samplecv==1 & fold`seed'!=`current_fold', nolog cl(gwgroupid)
        predict fcastIII`current_fold'`seed' if fold`seed' == `current_fold' & samplecv==1
        eststo model`current_fold'`seed'

        roctab onsetx fcastI`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen rocaucI`current_fold'`seed'=r(area)
        prtab onsetx fcastI`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen praaucI`current_fold'`seed'=r(AUC)
        
        roctab onsetx fcastII`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen rocaucII`current_fold'`seed'=r(area)
        prtab onsetx fcastII`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen praaucII`current_fold'`seed'=r(AUC)

        roctab onsetx fcastIII`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen rocaucIII`current_fold'`seed'=r(area)
        prtab onsetx fcastIII`current_fold'`seed' if fcastIII`current_fold'`seed'+fcastII`current_fold'`seed'+fcastI`current_fold'`seed' != .
        gen praaucIII`current_fold'`seed'=r(AUC)

        local vname fcastI`current_fold'`seed' fcastII`current_fold'`seed' fcastIII`current_fold'`seed'
        local vlist `vlist' onsetx `vname'

    }
}

set graphics on 
display "`vlist'"

gen roccumI = 0
gen roccumII = 0
gen roccumIII = 0
gen prauccumI = 0
gen prauccumII = 0
gen prauccumIII = 0

forvalues i=0(1)4 {
     forvalues j=1(1)10 {
           replace roccumI = roccumI + rocaucI`i'`j'
           replace roccumII = roccumII + rocaucII`i'`j'
           replace roccumIII = roccumIII + rocaucIII`i'`j'
           replace prauccumI = prauccumI + praaucI`i'`j'
           replace prauccumII = prauccumII + praaucII`i'`j'
           replace prauccumIII = prauccumIII + praaucIII`i'`j'
     }
}

replace roccumI = roccumI/50
replace roccumII = roccumII/50
replace roccumIII = roccumIII/50
replace prauccumI = prauccumI/50
replace prauccumII = prauccumII/50
replace prauccumIII = prauccumIII/50

estpost summarize rocauc*
estpost summarize praauc*

quietly sum roccumI
display "Cummulative ROC model I: `r(mean)'"
quietly sum roccumII
display "Cummulative ROC model II: `r(mean)'"
quietly sum roccumIII
display "Cummulative ROC model III: `r(mean)'"
quietly sum prauccumI
display "Cummulative PR model I: `r(mean)'"
quietly sum prauccumII
display "Cummulative PR model II: `r(mean)'"
quietly sum prauccumIII
display "Cummulative PR model III: `r(mean)'"


*** End ***
